/******************************************************************************
Paper: The Impact of Welfare on Intergroup Relations
Author: Akshay Dixit

Survey: This .do file cleans data from the village survey (i.e., survey conducted
with a village elected official.)

It also produces:
	- Figure 1 (Variation in Caste-Based Inequality in Land Ownership in the Survey Sample)
	- Table S12 (Comparison of lower and higher inequality villages)
	- Table S19 (Comparison of villages using 2023 survey data)
******************************************************************************/

clear all

gl data "$identity/data/survey"

********************************************************************************

*** Import data ***

u "$data/Village Survey V2.dta", clear

* Survey respondent (based on information shared by DAI) 

g respondent = ""

replace respondent = "upa sarpanch" if village == 2012 | village == 2021	| village == 2022 | village == 2008 | village == 2025 | village == 2035 | village == 1002 | village == 1026 | village == 1019 | village == 2006 | village == 1009 | village == 1018

replace respondent = "ward member" if village == 1027 | village == 2023 | village == 1024 | village == 1028 | village == 2018 | village == 1025 | village == 1015 | village == 2003 | village == 2004 | village == 2001 | village == 2039 | village == 2030 | village == 2007 | village == 2009

replace respondent = "mpp" if village == 2029

replace respondent = "counselor" if village == 2036 | village == 2033

replace respondent = "sarpanch" if respondent == ""

* Correct two data entry errors made by data collectors

replace q5p1 = 250 if q5p1 == 25
replace q2p13 = 10 if q2p13 == 1

********************************************************************************

*** Balance check ***

* Create variables of interest

g telangana = (state == 2)
tab telangana

g castes_in_village = q2p7
g sarpanch_reserved_sc = (q2p19 == 2)
g segregated_sc = (q2p18 == 1)
g govt_procured_crop = (q3p10 == 1)
g rate_leasing_land = q6p7/83 
g irrigated_land_price = q6p8/83 
g unirrigated_land_price = q6p9/83
egen mean_wage = rowmean(q5p1 q5p2 q5p3 q5p4) 
replace mean_wage = mean_wage/83

replace q2p14 = . if q2p14 < 0
replace q2p13 = . if q2p13 < 0
replace q2p10 = . if q2p10 < 0
replace q2p9 = . if q2p9 < 0

g dominant_land_to_pop = (q2p14/q2p13)

g low_land_inequality = (dominant_land_to_pop < 2)
replace low_land_inequality = . if dominant_land_to_pop == .
tab low_land_inequality

decode q2p12, gen(land_plurality_caste)
decode q2p8, gen(pop_plurality_caste) 
g reddy_land = (land_plurality_caste == "Reddy")
tab reddy_land if low_land_inequality == 0

* Label variables

lab var castes_in_village "No. of castes living in village"
lab var dominant_land_to_pop "Ratio of land-to-population percentage of caste owning plurality of land"
lab var low_land_inequality "Low land inequality: Whether the above ratio is less than two"
lab var govt_procured_crop "Govt procured crops in either of past two seasons"
lab var sarpanch_reserved_sc "Sarpanch position reserved for Scheduled Caste"
lab var segregated_sc "Separate colony in village for Scheduled Castes"
lab var rate_leasing_land "Rate for leasing 1 acre of land (USD)"
lab var irrigated_land_price "Price of 1 acre of irrigated land (USD)"
lab var unirrigated_land_price "Price of 1 acre of unirrigated land (USD)"
lab var mean_wage "Mean daily wage for agricultural labor (USD)"

* Compare villages across the Telangana and Andhra Pradesh border

putexcel set "$analysis/balance_check_survey.xlsx", replace 
putexcel B1=("(1)") C1=("(2)") D1=("(3)") E1=("(4)") F1=("(5)") G1=("(6)")
putexcel C2=("TS mean") D2=("AP mean") E2=("Difference") F2=("p-value") G2=("N")

local balance castes_in_village dominant_land_to_pop low_land_inequality govt_procured_crop sarpanch_reserved_sc segregated_sc rate_leasing_land irrigated_land_price unirrigated_land_price mean_wage 
local row = 3
local counter = 1

foreach var of local balance {
	
	di "`var'"
	
	sleep 2000
	local varlabel : var label `var'
	
		// TS mean
	qui sum `var' if telangana == 1
	local ts_mean = (r(mean))
	
		// AP mean
	qui sum `var' if telangana == 0
	local ap_mean = (r(mean))
	
		// Regression
	qui reg `var' telangana, vce(robust) 
	local p = 2*ttail(e(df_r), abs(_b[telangana]/_se[telangana]))
	local difference = _b[telangana]
	
		// Output
	putexcel A`row' = ("(`counter')")
	putexcel B`row' = ("`varlabel'")
	putexcel C`row' = (trim("`: display %10.2f `ts_mean''"))
	putexcel D`row' = (trim("`: display %10.2f `ap_mean''"))
	putexcel E`row' = ((_b[telangana]))
	putexcel F`row' = (`p')
	putexcel G`row' = (e(N))
	
	local ++row
	local ++counter
	
}


* Compare low vs. high inequality villages

putexcel set "$analysis/balance_check_inequality.xlsx", replace 
putexcel B1=("(1)") C1=("(2)") D1=("(3)") E1=("(4)") F1=("(5)") G1=("(6)")
putexcel C2=("Low inequality mean") D2=("High inequality mean") E2=("Difference") F2=("p-value") G2=("N")

local balance castes_in_village govt_procured_crop sarpanch_reserved_sc segregated_sc rate_leasing_land mean_wage 
local row = 3
local counter = 1

foreach var of local balance {
	
	di "`var'"
	
	sleep 2000
	local varlabel : var label `var'
	
		// Low inequality mean
	qui sum `var' if low_land_inequality == 1
	local low_mean = (r(mean))
	
		// High inequality mean
	qui sum `var' if low_land_inequality == 0
	local high_mean = (r(mean))
	
		// Regression
	qui reg `var' low_land_inequality, vce(robust) 
	local p = 2*ttail(e(df_r), abs(_b[low_land_inequality]/_se[low_land_inequality]))
	local difference = _b[low_land_inequality]
	
		// Output
	putexcel A`row' = ("(`counter')")
	putexcel B`row' = ("`varlabel'")
	putexcel C`row' = (trim("`: display %10.2f `low_mean''"))
	putexcel D`row' = (trim("`: display %10.2f `high_mean''"))
	putexcel E`row' = ((_b[low_land_inequality]))
	putexcel F`row' = (`p')
	putexcel G`row' = (e(N))
	
	local ++row
	local ++counter
	
}

********************************************************************************

*** Dominant land-to-population share visualization ***

summ dominant_land_to_pop, d
local median = r(p50)

kdensity dominant_land_to_pop, xlabel(0(2)14, valuelabel) xscale(range(0 14)) title("") xline(2)
graph export "$analysis/kdensity_land_to_pop.pdf", as(pdf) replace 

list dominant_land_to_pop q2p14 q2p13 if dominant_land_to_pop < 0.7

********************************************************************************

*** Prepare for merge with HH data: Rename and relabel variables to avoid confusion with HH data ***

drop castes_in_village sarpanch_reserved_sc govt_procured_crop rate_leasing_land irrigated_land_price unirrigated_land_price mean_wage land_plurality_caste pop_plurality_caste reddy_land low_land_inequality  dominant_land_to_pop

	// Add a v_ prefix to all variable names, except location variables 
ren * v_*
ren v_village village
ren v_state state
ren v_district district
ren v_subdist subdist

	// Rename variable labels
elabel rename * v_*
elabel rename v_consent consent
elabel rename v_district district

********************************************************************************

save "$data/Village Survey V2_clean.dta", replace

clear
